import git import os from datetime import datetime, timezone from neo4j import GraphDatabase import scriptBase.base # --- Neo4j 連線資訊 --- # 請替換為你自己的 Neo4j 伺服器地址、用戶名和密碼 NEO4J_URI = "bolt://99.12.23.49:7687" NEO4J_USER = "neo4j" NEO4J_PASSWORD = "1234.cc-" # --- 新增功能:清除 Neo4j 資料庫 --- def clear_neo4j_database(): """ 使用 Cypher 查詢清除 Neo4j 資料庫中的所有節點和關係。 警告:此操作不可逆! """ try: driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) with driver.session() as session: # Cypher 查詢:DETACH DELETE 會在刪除節點的同時,先刪除與之相關的所有關係 print("正在清空 Neo4j 資料庫,此操作無法撤銷...") session.run("MATCH (n) DETACH DELETE n") print("資料庫已成功清空。") driver.close() except Exception as e: print(f"清空 Neo4j 失敗: {e}") # --- 核心函數: Git 歷史提取與 Neo4j 匯入 --- def import_git_to_neo4j(repo_path, since=None): """ 提取 Git 倉庫的提交歷史,並直接匯入到 Neo4j 圖資料庫中。 Args: repo_path (str): Git 倉庫的本地路徑。 since (datetime, optional): 獲取提交的起始時間。如果為 None,則全量獲取。 """ # 驗證 Git 倉庫路徑 if not os.path.exists(repo_path) or not git.repo.base.is_git_dir(os.path.join(repo_path, '.git')): print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫路徑。") return try: repo = git.Repo(repo_path) except git.InvalidGitRepositoryError: print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫。請檢查路徑。") return # 連接 Neo4j 資料庫 try: driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) print("成功連接到 Neo4j 資料庫。") except Exception as e: print(f"連接 Neo4j 失敗: {e}") return # 提交事務函數 def import_transaction(tx, commit_data): # 創建或匹配作者節點 tx.run(""" MERGE (a:Author {email: $author_email}) ON CREATE SET a.name = $author_name """, author_email=commit_data['author_email'], author_name=commit_data['author_name']) # 創建提交節點 tx.run(""" MERGE (c:Commit {id: $commit_id}) ON CREATE SET c.message = $message, c.timestamp = $timestamp """, commit_id=commit_data['commit_id'], message=commit_data['commit_message'], timestamp=commit_data['committed_datetime']) # 創建作者與提交之間的關係 tx.run(""" MATCH (a:Author {email: $author_email}) MATCH (c:Commit {id: $commit_id}) MERGE (a)-[:AUTHORED]->(c) """, author_email=commit_data['author_email'], commit_id=commit_data['commit_id']) # 創建父提交關係 if commit_data['parent_id']: tx.run(""" MATCH (parent:Commit {id: $parent_id}) MATCH (child:Commit {id: $child_id}) MERGE (child)-[:PARENT]->(parent) """, parent_id=commit_data['parent_id'], child_id=commit_data['commit_id']) # 創建文件變更關係 for file_change in commit_data['file_changes']: # 創建或匹配文件節點 tx.run(""" MERGE (f:File {path: $file_path}) """, file_path=file_change['new_path']) # 創建提交與文件之間的修改關係 tx.run(""" MATCH (c:Commit {id: $commit_id}) MATCH (f:File {path: $file_path}) MERGE (c)-[:CHANGED { change_type: $change_type, lines_added: $lines_added, lines_deleted: $lines_deleted }]->(f) """, commit_id=commit_data['commit_id'], file_path=file_change['new_path'], change_type=file_change['change_type'], lines_added=file_change['lines_added'], lines_deleted=file_change['lines_deleted']) # 根據 'since' 參數選擇提交迭代器 if since: print(f"正在進行增量匯入,從 {since.isoformat()} 開始...") since_utc = since.astimezone(timezone.utc) commits = repo.iter_commits('--all', after=since_utc) else: print("正在進行全量匯入...") commits = repo.iter_commits('--all') with driver.session() as session: for commit in commits: # 排除合併提交 if len(commit.parents) > 1: continue file_changes = [] try: diff_index = commit.diff(commit.parents[0] if commit.parents else git.NULL_TREE, create_patch=True) for diff in diff_index: diff_text = diff.diff.decode('utf-8', errors='ignore') lines_added = diff_text.count('\n+') - diff_text.count('\n+++') lines_deleted = diff_text.count('\n-') - diff_text.count('\n---') change_type = diff.change_type if diff.change_type is not None else "" old_path = diff.a_path if diff.a_path is not None else "" new_path = diff.b_path if diff.b_path is not None else "" appendStr = { "change_type": change_type, "old_path": old_path, "new_path": new_path, "lines_added": lines_added, "lines_deleted": lines_deleted, } file_changes.append(appendStr) except Exception as e: print(f"警告: 處理提交 {commit.hexsha} 的文件變更時發生錯誤: {e}") continue parent_id = commit.parents[0].hexsha if commit.parents else None commit_data = { "commit_id": commit.hexsha, "parent_id": parent_id, "author_name": commit.author.name, "author_email": commit.author.email, "committed_datetime": commit.committed_datetime.isoformat(), "commit_message": commit.message.strip(), "file_changes": file_changes } try: session.execute_write(import_transaction, commit_data) print(f"已匯入提交: {commit.hexsha[:7]}, commit_data: {commit_data}") except Exception as e: print(f"警告: 匯入提交 {commit.hexsha[:7]} 時發生錯誤: {e}, commit_data: {commit_data}") driver.close() print("匯入完成,Neo4j 連接已關閉。") # --- 執行腳本 --- if __name__ == "__main__": # 替換為你的 Git 倉庫路徑 repo_path = 'D:/mayun/LR04.02_RVCTerminalPlus' # 步驟1: 清空資料庫 # 執行全量匯入前,通常會先清空資料庫以避免資料重複 print("----- 開始清空資料庫 -----") clear_neo4j_database() print("----- 資料庫清空完成 -----") # 步驟2:執行全量匯入 print("\n" + "="*50 + "\n") print("----- 執行全量匯入 -----") import_git_to_neo4j(repo_path) print("----- 全量匯入完成 -----") # 示例2:執行增量匯入 # 設置增量獲取的起始時間,例如從 2024年7月1日 開始 # start_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc) # print("\n" + "="*50 + "\n") # print("----- 執行增量匯入 -----") # import_git_to_neo4j(repo_path, since=start_time)