Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
L
LAE
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
文靖昊
LAE
Commits
27074f64
Commit
27074f64
authored
Jul 24, 2024
by
文靖昊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
去除混排,使用重排之后的前三个文档之和作为返回文档
parent
135a0fe7
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
26 deletions
+28
-26
get_similarity.py
src/server/get_similarity.py
+28
-26
No files found.
src/server/get_similarity.py
View file @
27074f64
...
@@ -81,44 +81,46 @@ class GetSimilarityWithExt:
...
@@ -81,44 +81,46 @@ class GetSimilarityWithExt:
return
result
return
result
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
def
get_rerank_with_doc
(
self
,
reranker
:
BgeRerank
,
split_docs_list
:
list
):
top_k
=
self
.
get_doc_nums
(
len
(
split_docs_list
))
#
top_k = self.get_doc_nums(len(split_docs_list))
question
=
'
\n
'
.
join
(
self
.
question
)
question
=
'
\n
'
.
join
(
self
.
question
)
print
(
question
)
print
(
question
)
rerank_docs1_hash
=
[]
#
rerank_docs1_hash = []
rerank_docs2_hash
=
[]
#
rerank_docs2_hash = []
m
=
{}
#
m = {}
result
=
[]
result
=
[]
for
split_doc
in
split_docs_list
:
for
split_doc
in
split_docs_list
:
start
=
time
.
time
()
start
=
time
.
time
()
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
rerank_docs1
=
reranker
.
compress_documents
(
split_doc
,
question
)
result
.
extend
(
rerank_docs1
[:
3
])
end
=
time
.
time
()
end
=
time
.
time
()
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
print
(
'重排1 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs1
:
#
for doc in rerank_docs1:
m
[
hash
(
doc
.
page_content
)]
=
doc
#
m[hash(doc.page_content)] = doc
rerank_docs1_hash
.
append
(
hash
(
doc
.
page_content
))
#
rerank_docs1_hash.append(hash(doc.page_content))
result
.
append
((
60
,
rerank_docs1_hash
))
#
result.append((60, rerank_docs1_hash))
start
=
time
.
time
()
start
=
time
.
time
()
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
rerank_docs2
=
reranker
.
compress_documents
(
self
.
similarity_docs
,
question
)
result
.
extend
(
rerank_docs2
[:
3
])
end
=
time
.
time
()
end
=
time
.
time
()
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
print
(
'重排2 time:
%
s Seconds'
%
(
end
-
start
))
for
doc
in
rerank_docs2
:
#
for doc in rerank_docs2:
m
[
hash
(
doc
.
page_content
)]
=
doc
#
m[hash(doc.page_content)] = doc
rerank_docs2_hash
.
append
(
hash
(
doc
.
page_content
))
#
rerank_docs2_hash.append(hash(doc.page_content))
#
result
.
append
((
55
,
rerank_docs2_hash
))
#
result.append((55,rerank_docs2_hash))
print
(
len
(
rerank_docs1_hash
))
#
print(len(rerank_docs1_hash))
print
(
len
(
rerank_docs2_hash
))
#
print(len(rerank_docs2_hash))
start
=
time
.
time
()
#
start = time.time()
rrf_doc
=
reciprocal_rank_fusion
(
result
)
#
rrf_doc = reciprocal_rank_fusion(result)
end
=
time
.
time
()
#
end = time.time()
print
(
'混排 time:
%
s Seconds'
%
(
end
-
start
))
#
print('混排 time: %s Seconds' % (end - start))
print
(
"混排文档数量:"
,
len
(
rrf_doc
))
#
print("混排文档数量:", len(rrf_doc))
d_list
=
[]
#
d_list = []
for
key
in
rrf_doc
:
#
for key in rrf_doc:
d_list
.
append
(
m
[
key
])
#
d_list.append(m[key])
print
(
"返回文档数量:"
,
top_k
)
#
print("返回文档数量:",top_k)
self
.
rerank_docs
=
d_list
[:
top_k
]
self
.
rerank_docs
=
result
return
self
.
join_document
(
d_list
[:
top_k
]
)
return
self
.
join_document
(
result
)
def
get_similarity_doc
(
self
):
def
get_similarity_doc
(
self
):
return
self
.
similarity_doc_txt
return
self
.
similarity_doc_txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment