|
@@ -0,0 +1,191 @@
|
|
|
+import git
|
|
|
+import os
|
|
|
+from datetime import datetime, timezone
|
|
|
+from neo4j import GraphDatabase
|
|
|
+import scriptBase.base
|
|
|
+
|
|
|
+# --- Neo4j 連線資訊 ---
|
|
|
+# 請替換為你自己的 Neo4j 伺服器地址、用戶名和密碼
|
|
|
+NEO4J_URI = "bolt://99.12.23.49:7687"
|
|
|
+NEO4J_USER = "neo4j"
|
|
|
+NEO4J_PASSWORD = "1234.cc-"
|
|
|
+
|
|
|
+# --- 新增功能:清除 Neo4j 資料庫 ---
|
|
|
+def clear_neo4j_database():
|
|
|
+ """
|
|
|
+ 使用 Cypher 查詢清除 Neo4j 資料庫中的所有節點和關係。
|
|
|
+ 警告:此操作不可逆!
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
|
|
+ with driver.session() as session:
|
|
|
+ # Cypher 查詢:DETACH DELETE 會在刪除節點的同時,先刪除與之相關的所有關係
|
|
|
+ print("正在清空 Neo4j 資料庫,此操作無法撤銷...")
|
|
|
+ session.run("MATCH (n) DETACH DELETE n")
|
|
|
+ print("資料庫已成功清空。")
|
|
|
+ driver.close()
|
|
|
+ except Exception as e:
|
|
|
+ print(f"清空 Neo4j 失敗: {e}")
|
|
|
+
|
|
|
+# --- 核心函數: Git 歷史提取與 Neo4j 匯入 ---
|
|
|
+def import_git_to_neo4j(repo_path, since=None):
|
|
|
+ """
|
|
|
+ 提取 Git 倉庫的提交歷史,並直接匯入到 Neo4j 圖資料庫中。
|
|
|
+
|
|
|
+ Args:
|
|
|
+ repo_path (str): Git 倉庫的本地路徑。
|
|
|
+ since (datetime, optional): 獲取提交的起始時間。如果為 None,則全量獲取。
|
|
|
+ """
|
|
|
+ # 驗證 Git 倉庫路徑
|
|
|
+ if not os.path.exists(repo_path) or not git.repo.base.is_git_dir(os.path.join(repo_path, '.git')):
|
|
|
+ print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫路徑。")
|
|
|
+ return
|
|
|
+
|
|
|
+ try:
|
|
|
+ repo = git.Repo(repo_path)
|
|
|
+ except git.InvalidGitRepositoryError:
|
|
|
+ print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫。請檢查路徑。")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 連接 Neo4j 資料庫
|
|
|
+ try:
|
|
|
+ driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
|
|
|
+ print("成功連接到 Neo4j 資料庫。")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"連接 Neo4j 失敗: {e}")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 提交事務函數
|
|
|
+ def import_transaction(tx, commit_data):
|
|
|
+ # 創建或匹配作者節點
|
|
|
+ tx.run("""
|
|
|
+ MERGE (a:Author {email: $author_email})
|
|
|
+ ON CREATE SET a.name = $author_name
|
|
|
+ """, author_email=commit_data['author_email'], author_name=commit_data['author_name'])
|
|
|
+
|
|
|
+ # 創建提交節點
|
|
|
+ tx.run("""
|
|
|
+ MERGE (c:Commit {id: $commit_id})
|
|
|
+ ON CREATE SET c.message = $message, c.timestamp = $timestamp
|
|
|
+ """, commit_id=commit_data['commit_id'], message=commit_data['commit_message'], timestamp=commit_data['committed_datetime'])
|
|
|
+
|
|
|
+ # 創建作者與提交之間的關係
|
|
|
+ tx.run("""
|
|
|
+ MATCH (a:Author {email: $author_email})
|
|
|
+ MATCH (c:Commit {id: $commit_id})
|
|
|
+ MERGE (a)-[:AUTHORED]->(c)
|
|
|
+ """, author_email=commit_data['author_email'], commit_id=commit_data['commit_id'])
|
|
|
+
|
|
|
+ # 創建父提交關係
|
|
|
+ if commit_data['parent_id']:
|
|
|
+ tx.run("""
|
|
|
+ MATCH (parent:Commit {id: $parent_id})
|
|
|
+ MATCH (child:Commit {id: $child_id})
|
|
|
+ MERGE (child)-[:PARENT]->(parent)
|
|
|
+ """, parent_id=commit_data['parent_id'], child_id=commit_data['commit_id'])
|
|
|
+
|
|
|
+ # 創建文件變更關係
|
|
|
+ for file_change in commit_data['file_changes']:
|
|
|
+ # 創建或匹配文件節點
|
|
|
+ tx.run("""
|
|
|
+ MERGE (f:File {path: $file_path})
|
|
|
+ """, file_path=file_change['new_path'])
|
|
|
+
|
|
|
+ # 創建提交與文件之間的修改關係
|
|
|
+ tx.run("""
|
|
|
+ MATCH (c:Commit {id: $commit_id})
|
|
|
+ MATCH (f:File {path: $file_path})
|
|
|
+ MERGE (c)-[:CHANGED {
|
|
|
+ change_type: $change_type,
|
|
|
+ lines_added: $lines_added,
|
|
|
+ lines_deleted: $lines_deleted
|
|
|
+ }]->(f)
|
|
|
+ """,
|
|
|
+ commit_id=commit_data['commit_id'],
|
|
|
+ file_path=file_change['new_path'],
|
|
|
+ change_type=file_change['change_type'],
|
|
|
+ lines_added=file_change['lines_added'],
|
|
|
+ lines_deleted=file_change['lines_deleted'])
|
|
|
+
|
|
|
+ # 根據 'since' 參數選擇提交迭代器
|
|
|
+ if since:
|
|
|
+ print(f"正在進行增量匯入,從 {since.isoformat()} 開始...")
|
|
|
+ since_utc = since.astimezone(timezone.utc)
|
|
|
+ commits = repo.iter_commits('--all', after=since_utc)
|
|
|
+ else:
|
|
|
+ print("正在進行全量匯入...")
|
|
|
+ commits = repo.iter_commits('--all')
|
|
|
+
|
|
|
+ with driver.session() as session:
|
|
|
+ for commit in commits:
|
|
|
+ # 排除合併提交
|
|
|
+ if len(commit.parents) > 1:
|
|
|
+ continue
|
|
|
+
|
|
|
+ file_changes = []
|
|
|
+ try:
|
|
|
+ diff_index = commit.diff(commit.parents[0] if commit.parents else git.NULL_TREE, create_patch=True)
|
|
|
+ for diff in diff_index:
|
|
|
+ diff_text = diff.diff.decode('utf-8', errors='ignore')
|
|
|
+ lines_added = diff_text.count('\n+') - diff_text.count('\n+++')
|
|
|
+ lines_deleted = diff_text.count('\n-') - diff_text.count('\n---')
|
|
|
+ change_type = diff.change_type if diff.change_type is not None else ""
|
|
|
+ old_path = diff.a_path if diff.a_path is not None else ""
|
|
|
+ new_path = diff.b_path if diff.b_path is not None else ""
|
|
|
+
|
|
|
+ appendStr = {
|
|
|
+ "change_type": change_type,
|
|
|
+ "old_path": old_path,
|
|
|
+ "new_path": new_path,
|
|
|
+ "lines_added": lines_added,
|
|
|
+ "lines_deleted": lines_deleted,
|
|
|
+ }
|
|
|
+
|
|
|
+ file_changes.append(appendStr)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"警告: 處理提交 {commit.hexsha} 的文件變更時發生錯誤: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ parent_id = commit.parents[0].hexsha if commit.parents else None
|
|
|
+ commit_data = {
|
|
|
+ "commit_id": commit.hexsha,
|
|
|
+ "parent_id": parent_id,
|
|
|
+ "author_name": commit.author.name,
|
|
|
+ "author_email": commit.author.email,
|
|
|
+ "committed_datetime": commit.committed_datetime.isoformat(),
|
|
|
+ "commit_message": commit.message.strip(),
|
|
|
+ "file_changes": file_changes
|
|
|
+ }
|
|
|
+
|
|
|
+ try:
|
|
|
+ session.execute_write(import_transaction, commit_data)
|
|
|
+ print(f"已匯入提交: {commit.hexsha[:7]}, commit_data: {commit_data}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"警告: 匯入提交 {commit.hexsha[:7]} 時發生錯誤: {e}, commit_data: {commit_data}")
|
|
|
+
|
|
|
+ driver.close()
|
|
|
+ print("匯入完成,Neo4j 連接已關閉。")
|
|
|
+
|
|
|
+# --- 執行腳本 ---
|
|
|
+if __name__ == "__main__":
|
|
|
+ # 替換為你的 Git 倉庫路徑
|
|
|
+ repo_path = 'D:/mayun/LR04.02_RVCTerminalPlus'
|
|
|
+
|
|
|
+ # 步驟1: 清空資料庫
|
|
|
+ # 執行全量匯入前,通常會先清空資料庫以避免資料重複
|
|
|
+ print("----- 開始清空資料庫 -----")
|
|
|
+ clear_neo4j_database()
|
|
|
+ print("----- 資料庫清空完成 -----")
|
|
|
+
|
|
|
+ # 步驟2:執行全量匯入
|
|
|
+ print("\n" + "="*50 + "\n")
|
|
|
+ print("----- 執行全量匯入 -----")
|
|
|
+ import_git_to_neo4j(repo_path)
|
|
|
+ print("----- 全量匯入完成 -----")
|
|
|
+
|
|
|
+ # 示例2:執行增量匯入
|
|
|
+ # 設置增量獲取的起始時間,例如從 2024年7月1日 開始
|
|
|
+ # start_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)
|
|
|
+ # print("\n" + "="*50 + "\n")
|
|
|
+ # print("----- 執行增量匯入 -----")
|
|
|
+ # import_git_to_neo4j(repo_path, since=start_time)
|