gitHistoryManage.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. import git
  2. import os
  3. from datetime import datetime, timezone
  4. from neo4j import GraphDatabase
  5. import scriptBase.base
  6. # --- Neo4j 連線資訊 ---
  7. # 請替換為你自己的 Neo4j 伺服器地址、用戶名和密碼
  8. NEO4J_URI = "bolt://99.12.23.49:7687"
  9. NEO4J_USER = "neo4j"
  10. NEO4J_PASSWORD = "1234.cc-"
  11. # --- 新增功能:清除 Neo4j 資料庫 ---
  12. def clear_neo4j_database():
  13. """
  14. 使用 Cypher 查詢清除 Neo4j 資料庫中的所有節點和關係。
  15. 警告:此操作不可逆!
  16. """
  17. try:
  18. driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
  19. with driver.session() as session:
  20. # Cypher 查詢:DETACH DELETE 會在刪除節點的同時,先刪除與之相關的所有關係
  21. print("正在清空 Neo4j 資料庫,此操作無法撤銷...")
  22. session.run("MATCH (n) DETACH DELETE n")
  23. print("資料庫已成功清空。")
  24. driver.close()
  25. except Exception as e:
  26. print(f"清空 Neo4j 失敗: {e}")
  27. # --- 核心函數: Git 歷史提取與 Neo4j 匯入 ---
  28. def import_git_to_neo4j(repo_path, since=None):
  29. """
  30. 提取 Git 倉庫的提交歷史,並直接匯入到 Neo4j 圖資料庫中。
  31. Args:
  32. repo_path (str): Git 倉庫的本地路徑。
  33. since (datetime, optional): 獲取提交的起始時間。如果為 None,則全量獲取。
  34. """
  35. # 驗證 Git 倉庫路徑
  36. if not os.path.exists(repo_path) or not git.repo.base.is_git_dir(os.path.join(repo_path, '.git')):
  37. print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫路徑。")
  38. return
  39. try:
  40. repo = git.Repo(repo_path)
  41. except git.InvalidGitRepositoryError:
  42. print(f"錯誤: '{repo_path}' 不是一個有效的 Git 倉庫。請檢查路徑。")
  43. return
  44. # 連接 Neo4j 資料庫
  45. try:
  46. driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
  47. print("成功連接到 Neo4j 資料庫。")
  48. except Exception as e:
  49. print(f"連接 Neo4j 失敗: {e}")
  50. return
  51. # 提交事務函數
  52. def import_transaction(tx, commit_data):
  53. # 創建或匹配作者節點
  54. tx.run("""
  55. MERGE (a:Author {email: $author_email})
  56. ON CREATE SET a.name = $author_name
  57. """, author_email=commit_data['author_email'], author_name=commit_data['author_name'])
  58. # 創建提交節點
  59. tx.run("""
  60. MERGE (c:Commit {id: $commit_id})
  61. ON CREATE SET c.message = $message, c.timestamp = $timestamp
  62. """, commit_id=commit_data['commit_id'], message=commit_data['commit_message'], timestamp=commit_data['committed_datetime'])
  63. # 創建作者與提交之間的關係
  64. tx.run("""
  65. MATCH (a:Author {email: $author_email})
  66. MATCH (c:Commit {id: $commit_id})
  67. MERGE (a)-[:AUTHORED]->(c)
  68. """, author_email=commit_data['author_email'], commit_id=commit_data['commit_id'])
  69. # 創建父提交關係
  70. if commit_data['parent_id']:
  71. tx.run("""
  72. MATCH (parent:Commit {id: $parent_id})
  73. MATCH (child:Commit {id: $child_id})
  74. MERGE (child)-[:PARENT]->(parent)
  75. """, parent_id=commit_data['parent_id'], child_id=commit_data['commit_id'])
  76. # 創建文件變更關係
  77. for file_change in commit_data['file_changes']:
  78. # 創建或匹配文件節點
  79. tx.run("""
  80. MERGE (f:File {path: $file_path})
  81. """, file_path=file_change['new_path'])
  82. # 創建提交與文件之間的修改關係
  83. tx.run("""
  84. MATCH (c:Commit {id: $commit_id})
  85. MATCH (f:File {path: $file_path})
  86. MERGE (c)-[:CHANGED {
  87. change_type: $change_type,
  88. lines_added: $lines_added,
  89. lines_deleted: $lines_deleted
  90. }]->(f)
  91. """,
  92. commit_id=commit_data['commit_id'],
  93. file_path=file_change['new_path'],
  94. change_type=file_change['change_type'],
  95. lines_added=file_change['lines_added'],
  96. lines_deleted=file_change['lines_deleted'])
  97. # 根據 'since' 參數選擇提交迭代器
  98. if since:
  99. print(f"正在進行增量匯入,從 {since.isoformat()} 開始...")
  100. since_utc = since.astimezone(timezone.utc)
  101. commits = repo.iter_commits('--all', after=since_utc)
  102. else:
  103. print("正在進行全量匯入...")
  104. commits = repo.iter_commits('--all')
  105. with driver.session() as session:
  106. for commit in commits:
  107. # 排除合併提交
  108. if len(commit.parents) > 1:
  109. continue
  110. file_changes = []
  111. try:
  112. diff_index = commit.diff(commit.parents[0] if commit.parents else git.NULL_TREE, create_patch=True)
  113. for diff in diff_index:
  114. diff_text = diff.diff.decode('utf-8', errors='ignore')
  115. lines_added = diff_text.count('\n+') - diff_text.count('\n+++')
  116. lines_deleted = diff_text.count('\n-') - diff_text.count('\n---')
  117. change_type = diff.change_type if diff.change_type is not None else ""
  118. old_path = diff.a_path if diff.a_path is not None else ""
  119. new_path = diff.b_path if diff.b_path is not None else ""
  120. appendStr = {
  121. "change_type": change_type,
  122. "old_path": old_path,
  123. "new_path": new_path,
  124. "lines_added": lines_added,
  125. "lines_deleted": lines_deleted,
  126. }
  127. file_changes.append(appendStr)
  128. except Exception as e:
  129. print(f"警告: 處理提交 {commit.hexsha} 的文件變更時發生錯誤: {e}")
  130. continue
  131. parent_id = commit.parents[0].hexsha if commit.parents else None
  132. commit_data = {
  133. "commit_id": commit.hexsha,
  134. "parent_id": parent_id,
  135. "author_name": commit.author.name,
  136. "author_email": commit.author.email,
  137. "committed_datetime": commit.committed_datetime.isoformat(),
  138. "commit_message": commit.message.strip(),
  139. "file_changes": file_changes
  140. }
  141. try:
  142. session.execute_write(import_transaction, commit_data)
  143. print(f"已匯入提交: {commit.hexsha[:7]}, commit_data: {commit_data}")
  144. except Exception as e:
  145. print(f"警告: 匯入提交 {commit.hexsha[:7]} 時發生錯誤: {e}, commit_data: {commit_data}")
  146. driver.close()
  147. print("匯入完成,Neo4j 連接已關閉。")
  148. # --- 執行腳本 ---
  149. if __name__ == "__main__":
  150. # 替換為你的 Git 倉庫路徑
  151. repo_path = 'D:/mayun/LR04.02_RVCTerminalPlus'
  152. # 步驟1: 清空資料庫
  153. # 執行全量匯入前,通常會先清空資料庫以避免資料重複
  154. print("----- 開始清空資料庫 -----")
  155. clear_neo4j_database()
  156. print("----- 資料庫清空完成 -----")
  157. # 步驟2:執行全量匯入
  158. print("\n" + "="*50 + "\n")
  159. print("----- 執行全量匯入 -----")
  160. import_git_to_neo4j(repo_path)
  161. print("----- 全量匯入完成 -----")
  162. # 示例2:執行增量匯入
  163. # 設置增量獲取的起始時間,例如從 2024年7月1日 開始
  164. # start_time = datetime(2024, 7, 1, 0, 0, 0, tzinfo=timezone.utc)
  165. # print("\n" + "="*50 + "\n")
  166. # print("----- 執行增量匯入 -----")
  167. # import_git_to_neo4j(repo_path, since=start_time)